////////////////////////////////////////////////////////////////////////////////
//////// PRESIDENTIAL ELECTIONS DATASET
////////////////////////////////////////////////////////////////////////////////

//List of flags
local flags = "flag_two_round flag_inconsequential flag_inconsequential_note flag_plebiscite flag_unopposed flag_coup"

//Different sources used
local AC_data = "$project_path/data/1_input/elections/AC/AC_database.dta"
local AED_data = "$project_path/data/1_input/elections/AED/AED_database.dta"
local EED_data = "$project_path/data/1_input/elections/EED/EED_database.dta"
local PDA_data = "$project_path/data/1_input/elections/PDA/PDA_database.dta"
local Wiki_data = "$project_path/data/1_input/elections/Wikipedia/Wiki_database.dta"
local IFES_data = "$project_path/data/1_input/elections/IFES/IFES_database.dta"
local USA_data = "$project_path/data/1_input/elections/USA/USA_database.dta"
local Nohlen_data = "$project_path/data/1_input/elections/Nohlen/Nohlen_database.dta"

//Priority list
local priority_list = "Nohlen AC AED USA EED PDA IFES Wiki"

////////////////////////////////////////////////////////////////////////////////
//////// a. Assigning a source to each election and merging flags
////////////////////////////////////////////////////////////////////////////////

// List of presidential elections in V-Dem
use "$project_path/data/2_intermediary/elections/Presidential elections/pres_elec_vdem", clear

// Initializing flags
gen flag_inconsequential = . 
gen flag_inconsequential_note = ""
gen flag_coup = .
gen flag_plebiscite = . 
gen flag_unopposed = . 
gen flag_indirect = .

// Merging flags and availability variable for all sources
foreach source in `priority_list' {
	tempfile temp
	save `temp'
	use "``source'_data'", clear
	gen available_`source' = 1 if Candidate_1!=""
	keep Country Year Month Type_Election available_`source' flag_*
	foreach v of varlist flag_* {
		ren `v' `v'_`source'
	}
	merge 1:1 Country Year Month Type_Election using `temp', nogen
}

// Assigning flags 
foreach source in `priority_list' {
	foreach flag in flag_two_round flag_inconsequential flag_coup flag_plebiscite flag_unopposed flag_indirect {
		replace `flag' = `flag'_`source' if `flag'==.
		drop `flag'_`source'
	}
	replace flag_inconsequential_note = flag_inconsequential_note_`source' if flag_inconsequential_note==""
	drop flag_inconsequential_note_`source'
}

// No "wrong_sum_share" flag for indirect elections
foreach source in `priority_list' {
	replace flag_wrong_sum_share_`source' = . if flag_indirect==1
}

// Flag for elections that are not the last of their type during a year
sort Country Year Month
by Country Year : gen temp = _n
by Country Year : egen temp2 = max(temp)
gen flag_not_last = 1 if temp!=temp2
drop temp*
// Assigning source for each election (the "best" variable tracks whether the best available source has inconsistent shares)
gen Source = ""
gen best = 0
foreach source in `priority_list' {
	replace Source = "`source'" if available_`source'==1 & flag_wrong_sum_share_`source'!=1 & (best==0 | best==2)
	replace Source = "`source'" if available_`source'==1 & flag_wrong_sum_share_`source'==1 & best==0
	replace best = 1 if available_`source'==1 & flag_wrong_sum_share_`source'!=1
	replace best = 2 if available_`source'==1 & best==0 & flag_wrong_sum_share_`source'==1
	drop flag_wrong_sum_share_`source' available_`source'
}

gen flag_wrong_sum_share = 1 if best==2
drop best
order Country Year Month Type_Election Source

// Manually changing the source when the one highest in the priority list has inaccurate or incomplete data
*Guatemala 2015: to get the entire list of candidates (including the representative of the incumbency), we select Wikipedia as a source instead of IFES
replace Source = "Wiki" if Country=="Guatemala" & Year==2015
*Honduras 1985: Nohlen does not give the list of candidates, but only results by party
replace Source = "Wiki" if Country=="Honduras" & Year==1985
*Panama 1984: results in Nohlen are wrong, the scores of the two top candidates are flipped
replace Source = "Wiki" if Country=="Panama" & Year==1984
*Panama 1989: results in Nohlen are wrong, the scores of the two top candidates are flipped
replace Source = "Wiki" if Country=="Panama" & Year==1989
*Sao Tome and Principe 2011: results from IFES are missing a candidate
replace Source = "Wiki" if Country=="Sao Tome and Principe" & Year==2011

// Manually adjusting flags
replace flag_two_round = . if Country=="Montenegro" & Year==2003

// Elections in Bosnia-Herz: Collective presidency, we ignore these elections
drop if Country=="Bosnia-Herz"

////////////////////////////////////////////////////////////////////////////////
//////// b. Gathering election results
////////////////////////////////////////////////////////////////////////////////

// Gathering data from various source

tempfile selected_sources
save `selected_sources'
foreach source in `priority_list' {
	use `selected_sources', clear
	keep if Source=="`source'"
	merge 1:1 Country Year Month Type_Election using "``source'_data'", assert(using match) keep(match) nogen
	drop flag_* 
	tempfile results_`source'
	save `results_`source''
}

keep if Country==""
foreach source in `priority_list' {
	append using `results_`source''
}
tempfile results
save `results'

use `selected_sources', clear
merge 1:1 Country Year Month Type_Election using `results', assert(master match) nogen

// Adding election dates
merge 1:1 Country Year Month Type_Election using "$project_path/data/2_intermediary/elections/Election dates/election_dates.dta", keep(master match) nogen

// Automatically defined variables: If the number of votes are available, but not vote shares, we set the vote share as the number of votes divised by sum of number of votes for each candidate.
gen nb_votes1=0
gen nb_votes2=0
gen nb_voteshare1=0
gen nb_voteshare2=0
gen nb_evotes1=0
gen nb_evotes2=0
gen nb_evotes3=0
gen nb_evoteshare1=0
gen nb_evoteshare2=0
gen nb_evoteshare3=0
forvalues i=1/50 {
	replace nb_votes1 = nb_votes1+1 if Votes1_`i'!=.
	replace nb_votes2 = nb_votes2+1 if Votes2_`i'!=.
	replace nb_voteshare1 = nb_voteshare1+1 if Vote_Share1_`i'!=.
	replace nb_voteshare2 = nb_voteshare2+1 if Vote_Share2_`i'!=.
	replace nb_evotes1 = nb_evotes1+1 if E_Votes1_`i'!=.
	replace nb_evotes2 = nb_evotes2+1 if E_Votes2_`i'!=.
	replace nb_evotes3 = nb_evotes3+1 if E_Votes3_`i'!=.
	replace nb_evoteshare1 = nb_evoteshare1+1 if E_Vote_Share1_`i'!=.
	replace nb_evoteshare2 = nb_evoteshare2+1 if E_Vote_Share2_`i'!=.
	replace nb_evoteshare3 = nb_evoteshare3+1 if E_Vote_Share3_`i'!=.
}
egen sumevotes1 = rowtotal(E_Votes1_*)
egen sumevotes2 = rowtotal(E_Votes2_*)
egen sumevotes3 = rowtotal(E_Votes3_*)
egen sumvotes1 = rowtotal(Votes1_*)
egen sumvotes2 = rowtotal(Votes2_*)

forvalues i=1/50 {
	*For the electoral college (only with cast E votes)
	replace E_Vote_Share1_`i' = 100*(E_Votes1_`i' / sumevotes1) if nb_evoteshare1==0 & nb_evotes1!=0 
	replace E_Vote_Share2_`i' = 100*(E_Votes2_`i' / sumevotes2) if nb_evoteshare2==0 & nb_evotes2!=0 
	replace E_Vote_Share3_`i' = 100*(E_Votes3_`i' / sumevotes3) if nb_evoteshare3==0 & nb_evotes3!=0 
	*For votes (with valid votes, then cast votes)
	replace Vote_Share1_`i' = 100*(Votes1_`i' / sumvotes1) if nb_votes1!=0 & nb_voteshare1==0 
	replace Vote_Share2_`i' = 100*(Votes2_`i' / sumvotes2) if nb_votes2!=0 & nb_voteshare2==0
}

drop nb_votes1 nb_votes2 nb_voteshare1 nb_voteshare2 nb_evotes1 nb_evotes2 nb_evotes3 nb_evoteshare1 nb_evoteshare2 nb_evoteshare3 sumevotes1 sumevotes2 sumevotes3 sumvotes1 sumvotes2

// Ordering candidates (in some election results, we have "candidates" that are "others", or "Against all". We rank them after "regular" candidates)

preserve
keep Country Year Month Type_Election flag_*
tempfile flags
save `flags'
restore

drop flag_*
sort Country Year Month

reshape long Candidate_ Party_ Votes1_ Votes2_ Vote_Share1_ Vote_Share2_ E_Votes1_ E_Votes2_ E_Votes3_ E_Vote_Share1_ E_Vote_Share2_ E_Vote_Share3_, i(Country Year Month Type_Election Date Source) j(oldid)
gen temp = 100 if Candidate_ == ""
replace temp = 99 if Candidate_=="Dispersed" | Candidate_=="N/A" | Candidate_=="None of the above" 
replace temp = 98 if strpos(Candidate_,"Against") > 0 | strpos(Candidate_,"against") > 0
replace temp = 97 if strpos(Candidate_,"Other") > 0 | strpos(Candidate_,"other") > 0	
replace temp = 96 if strpos(Candidate_,"vote") > 0 | strpos(Candidate_,"Vote") > 0 
replace temp = 0 if temp==.
gsort Country Year Month temp -E_Vote_Share3_ -E_Votes3_ -E_Vote_Share2_ -E_Votes2_ -E_Vote_Share1_ -E_Votes1_ -Vote_Share2_ -Votes2_ -Vote_Share1_ -Votes1_ Candidate_
drop temp
by Country Year Month : gen id=_n
drop oldid
reshape wide Candidate_ Party_ Votes1_ Votes2_ Vote_Share1_ Vote_Share2_ E_Votes1_ E_Votes2_ E_Votes3_ E_Vote_Share1_ E_Vote_Share2_ E_Vote_Share3_, i(Country Year Month Type_Election Date Source) j(id)

merge 1:1 Country Year Month Type_Election using `flags', assert(match) nogen

// We remove indirect elections for which manipulation is possible (the indirect presidential elections which should be included have been documented in an excel spreadsheet)
tempfile temp
save `temp'
import excel "$project_path/data/1_input/elections/Indirect elections/indirect_elections_to_include.xlsx", firstrow clear
keep Country Year Month to_include
ren to_include indirect_to_include
merge 1:1 Country Year Month using `temp', assert(using match) nogen
drop if flag_indirect==1 & indirect_to_include!=1

// Labeling variables

lab var Country  "Country in which the election took place"
lab var Year     "Year of the election"
lab var Month    "Month of the election"
lab var Type_Election    "Election type (“Presidential”)"
lab var Source   "Election results source"
lab var flag_wrong_sum_share     "Sum of voter shares for all candidates is inconsistent"
lab var flag_two_round   "Flags elections with two rounds"
lab var flag_inconsequential     "Flags cancelled or inconsequential elections"
lab var flag_inconsequential_note  "Reason why flag_inconsequential is equal to 1"
lab var flag_coup		"Flags elections that were shortly followed by a coup or revolution"
lab var flag_indirect    "Flags an indirect election"
lab var flag_plebiscite  "Flags elections that took the form of a plebiscite or referendum"
lab var flag_unopposed   "Flags elections in which the incumbent was reelected unopposed"
lab var flag_not_last	"Election which was not the last of its type in a given year"
forvalues i=1/50 {
	lab var Candidate_`i'    "Name of candidate n°`i'"
	lab var Party_`i'        "Party of candidate n°`i'"
	lab var Votes1_`i'       "Number of votes for candidate n°`i' during the first round"
	lab var Votes2_`i'       "Number of votes for candidate n°`i' during the second round"
	lab var Vote_Share1_`i'  "Vote share for candidate n°`i' during the first round"
	lab var Vote_Share2_`i'  "Vote share for candidate n°`i' during the second round"
	lab var E_Votes1_`i'     "Number of electoral college votes for candidate n°`i' during the first round"
	lab var E_Votes2_`i'     "Number of electoral college votes for candidate n°`i' during the second round"
	lab var E_Votes3_`i'     "Number of electoral college votes for candidate n°`i' during the third round "
	lab var E_Vote_Share1_`i'        "Electoral college vote share for candidate n°`i' during the first round"
	lab var E_Vote_Share2_`i'        "Electoral college vote share for candidate n°`i' during the second round"
	lab var E_Vote_Share3_`i'        "Electoral college vote share for candidate n°`i' during the third round"
	recast double Vote_Share1_`i'
	recast double Vote_Share2_`i'
	recast double E_Vote_Share1_`i'
	recast double E_Vote_Share2_`i'
	recast double E_Vote_Share3_`i'
}

// Saving output

label define month_label 1 "January" 2 "February" 3 "March" 4 "April" 5 "May" 6 "June" 7 "July" 8 "August" 9 "September" 10 "October" 11 "November" 12 "December", replace
label values Month month_label
order Country Year Month Type_Election Date Source, first
order flag_two_round flag_not_last flag_inconsequential flag_inconsequential_note flag_coup flag_plebiscite flag_unopposed flag_indirect flag_wrong_sum_share, last
format Country flag_inconsequential_note %20s
format Type_Election %15s
format Candidate_* %25s
format Party_* %25s

compress
label data "Presidential elections database -- Marx, Pons, and Rollet (2024)"
save "$project_path/data/3_cleaned/presidential_elections", replace

